{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Feature extraction with tsfresh transformer\n",
    "\n",
    "In this tutorial, we show how you can use sktime with [tsfresh](https://tsfresh.readthedocs.io) to first extract features from time series, so that we can then use any scikit-learn estimator.\n",
    "\n",
    "## Preliminaries\n",
    "You have to install tsfresh if you haven't already. To install it, uncomment the cell below:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2020-12-19T14:30:39.713903Z",
     "iopub.status.busy": "2020-12-19T14:30:39.713342Z",
     "iopub.status.idle": "2020-12-19T14:30:39.715128Z",
     "shell.execute_reply": "2020-12-19T14:30:39.715641Z"
    }
   },
   "outputs": [],
   "source": [
    "# !pip install --upgrade tsfresh"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2020-12-19T14:30:39.719083Z",
     "iopub.status.busy": "2020-12-19T14:30:39.718586Z",
     "iopub.status.idle": "2020-12-19T14:30:40.743724Z",
     "shell.execute_reply": "2020-12-19T14:30:40.744213Z"
    }
   },
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "from sklearn.ensemble import RandomForestClassifier\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.pipeline import make_pipeline\n",
    "\n",
    "from sktime.datasets import load_arrow_head, load_basic_motions\n",
    "from sktime.transformations.panel.tsfresh import TSFreshFeatureExtractor"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Univariate time series classification data\n",
    "\n",
    "For more details on the data set, see the [univariate time series classification notebook](https://github.com/alan-turing-institute/sktime/blob/main/examples/02_classification_univariate.ipynb)."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2020-12-19T14:30:40.748159Z",
     "iopub.status.busy": "2020-12-19T14:30:40.747656Z",
     "iopub.status.idle": "2020-12-19T14:30:40.795200Z",
     "shell.execute_reply": "2020-12-19T14:30:40.795889Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(158, 1) (158,) (53, 1) (53,)\n"
     ]
    }
   ],
   "source": [
    "X, y = load_arrow_head(return_X_y=True)\n",
    "X_train, X_test, y_train, y_test = train_test_split(X, y)\n",
    "print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2020-12-19T14:30:40.808841Z",
     "iopub.status.busy": "2020-12-19T14:30:40.808198Z",
     "iopub.status.idle": "2020-12-19T14:30:40.816155Z",
     "shell.execute_reply": "2020-12-19T14:30:40.816682Z"
    },
    "jupyter": {
     "outputs_hidden": false
    },
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>dim_0</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>69</th>\n",
       "      <td>0     -1.7998\n",
       "1     -1.7987\n",
       "2     -1.7942\n",
       "3   ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>103</th>\n",
       "      <td>0     -1.8091\n",
       "1     -1.8067\n",
       "2     -1.7866\n",
       "3   ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>34</th>\n",
       "      <td>0     -2.0417\n",
       "1     -2.0572\n",
       "2     -2.0522\n",
       "3   ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>0     -2.1888\n",
       "1     -2.1855\n",
       "2     -2.1765\n",
       "3   ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>121</th>\n",
       "      <td>0     -1.9586\n",
       "1     -1.9371\n",
       "2     -1.8798\n",
       "3   ...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                 dim_0\n",
       "69   0     -1.7998\n",
       "1     -1.7987\n",
       "2     -1.7942\n",
       "3   ...\n",
       "103  0     -1.8091\n",
       "1     -1.8067\n",
       "2     -1.7866\n",
       "3   ...\n",
       "34   0     -2.0417\n",
       "1     -2.0572\n",
       "2     -2.0522\n",
       "3   ...\n",
       "14   0     -2.1888\n",
       "1     -2.1855\n",
       "2     -2.1765\n",
       "3   ...\n",
       "121  0     -1.9586\n",
       "1     -1.9371\n",
       "2     -1.8798\n",
       "3   ..."
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X_train.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2020-12-19T14:30:40.820002Z",
     "iopub.status.busy": "2020-12-19T14:30:40.819515Z",
     "iopub.status.idle": "2020-12-19T14:30:40.821979Z",
     "shell.execute_reply": "2020-12-19T14:30:40.822517Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array(['0', '1', '2'], dtype=object)"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#  binary classification task\n",
    "np.unique(y_train)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Using tsfresh to extract features"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2020-12-19T14:30:40.829452Z",
     "iopub.status.busy": "2020-12-19T14:30:40.828907Z",
     "iopub.status.idle": "2020-12-19T14:30:53.049755Z",
     "shell.execute_reply": "2020-12-19T14:30:53.050249Z"
    }
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/mloning/Documents/Research/software/sktime/sktime/sktime/transformations/panel/tsfresh.py:164: UserWarning: tsfresh requires a unique index, but found non-unique. To avoid this warning, please make sure the index of X contains only unique values.\n",
      "  \"tsfresh requires a unique index, but found \"\n",
      "Feature Extraction: 100%|██████████| 5/5 [00:10<00:00,  2.05s/it]\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>dim_0__variance_larger_than_standard_deviation</th>\n",
       "      <th>dim_0__has_duplicate_max</th>\n",
       "      <th>dim_0__has_duplicate_min</th>\n",
       "      <th>dim_0__has_duplicate</th>\n",
       "      <th>dim_0__sum_values</th>\n",
       "      <th>dim_0__abs_energy</th>\n",
       "      <th>dim_0__mean_abs_change</th>\n",
       "      <th>dim_0__mean_change</th>\n",
       "      <th>dim_0__mean_second_derivative_central</th>\n",
       "      <th>dim_0__median</th>\n",
       "      <th>...</th>\n",
       "      <th>dim_0__fourier_entropy__bins_2</th>\n",
       "      <th>dim_0__fourier_entropy__bins_3</th>\n",
       "      <th>dim_0__fourier_entropy__bins_5</th>\n",
       "      <th>dim_0__fourier_entropy__bins_10</th>\n",
       "      <th>dim_0__fourier_entropy__bins_100</th>\n",
       "      <th>dim_0__permutation_entropy__dimension_3__tau_1</th>\n",
       "      <th>dim_0__permutation_entropy__dimension_4__tau_1</th>\n",
       "      <th>dim_0__permutation_entropy__dimension_5__tau_1</th>\n",
       "      <th>dim_0__permutation_entropy__dimension_6__tau_1</th>\n",
       "      <th>dim_0__permutation_entropy__dimension_7__tau_1</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>-0.000080</td>\n",
       "      <td>249.998516</td>\n",
       "      <td>0.052357</td>\n",
       "      <td>-0.000001</td>\n",
       "      <td>-0.000005</td>\n",
       "      <td>-0.024066</td>\n",
       "      <td>...</td>\n",
       "      <td>0.046288</td>\n",
       "      <td>0.092513</td>\n",
       "      <td>0.092513</td>\n",
       "      <td>0.092513</td>\n",
       "      <td>0.250609</td>\n",
       "      <td>1.323194</td>\n",
       "      <td>1.819631</td>\n",
       "      <td>2.183824</td>\n",
       "      <td>2.463220</td>\n",
       "      <td>2.707387</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>-0.000525</td>\n",
       "      <td>250.000756</td>\n",
       "      <td>0.049118</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>-0.000006</td>\n",
       "      <td>-0.031622</td>\n",
       "      <td>...</td>\n",
       "      <td>0.046288</td>\n",
       "      <td>0.046288</td>\n",
       "      <td>0.092513</td>\n",
       "      <td>0.092513</td>\n",
       "      <td>0.184769</td>\n",
       "      <td>1.213529</td>\n",
       "      <td>1.668744</td>\n",
       "      <td>2.081159</td>\n",
       "      <td>2.418614</td>\n",
       "      <td>2.707518</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>-0.000034</td>\n",
       "      <td>249.998998</td>\n",
       "      <td>0.069971</td>\n",
       "      <td>0.000084</td>\n",
       "      <td>0.000025</td>\n",
       "      <td>0.018880</td>\n",
       "      <td>...</td>\n",
       "      <td>0.081510</td>\n",
       "      <td>0.092513</td>\n",
       "      <td>0.092513</td>\n",
       "      <td>0.138673</td>\n",
       "      <td>0.311663</td>\n",
       "      <td>1.116706</td>\n",
       "      <td>1.545256</td>\n",
       "      <td>1.889777</td>\n",
       "      <td>2.155644</td>\n",
       "      <td>2.374722</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.000202</td>\n",
       "      <td>249.999702</td>\n",
       "      <td>0.067601</td>\n",
       "      <td>-0.000002</td>\n",
       "      <td>-0.000010</td>\n",
       "      <td>0.384770</td>\n",
       "      <td>...</td>\n",
       "      <td>0.046288</td>\n",
       "      <td>0.092513</td>\n",
       "      <td>0.092513</td>\n",
       "      <td>0.204643</td>\n",
       "      <td>0.414263</td>\n",
       "      <td>1.323315</td>\n",
       "      <td>1.915330</td>\n",
       "      <td>2.406197</td>\n",
       "      <td>2.794719</td>\n",
       "      <td>3.117007</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>-0.000146</td>\n",
       "      <td>249.998674</td>\n",
       "      <td>0.050355</td>\n",
       "      <td>-0.000004</td>\n",
       "      <td>-0.000046</td>\n",
       "      <td>-0.045353</td>\n",
       "      <td>...</td>\n",
       "      <td>0.046288</td>\n",
       "      <td>0.092513</td>\n",
       "      <td>0.092513</td>\n",
       "      <td>0.092513</td>\n",
       "      <td>0.230801</td>\n",
       "      <td>1.173933</td>\n",
       "      <td>1.628543</td>\n",
       "      <td>2.003443</td>\n",
       "      <td>2.303091</td>\n",
       "      <td>2.559695</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 773 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   dim_0__variance_larger_than_standard_deviation  dim_0__has_duplicate_max  \\\n",
       "0                                             0.0                       0.0   \n",
       "1                                             0.0                       0.0   \n",
       "2                                             0.0                       0.0   \n",
       "3                                             0.0                       0.0   \n",
       "4                                             0.0                       0.0   \n",
       "\n",
       "   dim_0__has_duplicate_min  dim_0__has_duplicate  dim_0__sum_values  \\\n",
       "0                       0.0                   1.0          -0.000080   \n",
       "1                       1.0                   1.0          -0.000525   \n",
       "2                       0.0                   1.0          -0.000034   \n",
       "3                       0.0                   1.0           0.000202   \n",
       "4                       0.0                   1.0          -0.000146   \n",
       "\n",
       "   dim_0__abs_energy  dim_0__mean_abs_change  dim_0__mean_change  \\\n",
       "0         249.998516                0.052357           -0.000001   \n",
       "1         250.000756                0.049118            0.000000   \n",
       "2         249.998998                0.069971            0.000084   \n",
       "3         249.999702                0.067601           -0.000002   \n",
       "4         249.998674                0.050355           -0.000004   \n",
       "\n",
       "   dim_0__mean_second_derivative_central  dim_0__median  ...  \\\n",
       "0                              -0.000005      -0.024066  ...   \n",
       "1                              -0.000006      -0.031622  ...   \n",
       "2                               0.000025       0.018880  ...   \n",
       "3                              -0.000010       0.384770  ...   \n",
       "4                              -0.000046      -0.045353  ...   \n",
       "\n",
       "   dim_0__fourier_entropy__bins_2  dim_0__fourier_entropy__bins_3  \\\n",
       "0                        0.046288                        0.092513   \n",
       "1                        0.046288                        0.046288   \n",
       "2                        0.081510                        0.092513   \n",
       "3                        0.046288                        0.092513   \n",
       "4                        0.046288                        0.092513   \n",
       "\n",
       "   dim_0__fourier_entropy__bins_5  dim_0__fourier_entropy__bins_10  \\\n",
       "0                        0.092513                         0.092513   \n",
       "1                        0.092513                         0.092513   \n",
       "2                        0.092513                         0.138673   \n",
       "3                        0.092513                         0.204643   \n",
       "4                        0.092513                         0.092513   \n",
       "\n",
       "   dim_0__fourier_entropy__bins_100  \\\n",
       "0                          0.250609   \n",
       "1                          0.184769   \n",
       "2                          0.311663   \n",
       "3                          0.414263   \n",
       "4                          0.230801   \n",
       "\n",
       "   dim_0__permutation_entropy__dimension_3__tau_1  \\\n",
       "0                                        1.323194   \n",
       "1                                        1.213529   \n",
       "2                                        1.116706   \n",
       "3                                        1.323315   \n",
       "4                                        1.173933   \n",
       "\n",
       "   dim_0__permutation_entropy__dimension_4__tau_1  \\\n",
       "0                                        1.819631   \n",
       "1                                        1.668744   \n",
       "2                                        1.545256   \n",
       "3                                        1.915330   \n",
       "4                                        1.628543   \n",
       "\n",
       "   dim_0__permutation_entropy__dimension_5__tau_1  \\\n",
       "0                                        2.183824   \n",
       "1                                        2.081159   \n",
       "2                                        1.889777   \n",
       "3                                        2.406197   \n",
       "4                                        2.003443   \n",
       "\n",
       "   dim_0__permutation_entropy__dimension_6__tau_1  \\\n",
       "0                                        2.463220   \n",
       "1                                        2.418614   \n",
       "2                                        2.155644   \n",
       "3                                        2.794719   \n",
       "4                                        2.303091   \n",
       "\n",
       "   dim_0__permutation_entropy__dimension_7__tau_1  \n",
       "0                                        2.707387  \n",
       "1                                        2.707518  \n",
       "2                                        2.374722  \n",
       "3                                        3.117007  \n",
       "4                                        2.559695  \n",
       "\n",
       "[5 rows x 773 columns]"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# tf = TsFreshTransformer()\n",
    "t = TSFreshFeatureExtractor(default_fc_parameters=\"efficient\", show_warnings=False)\n",
    "Xt = t.fit_transform(X_train)\n",
    "Xt.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Using tsfresh with sktime"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2020-12-19T14:30:53.062147Z",
     "iopub.status.busy": "2020-12-19T14:30:53.061631Z",
     "iopub.status.idle": "2020-12-19T14:31:09.307275Z",
     "shell.execute_reply": "2020-12-19T14:31:09.307781Z"
    }
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/mloning/Documents/Research/software/sktime/sktime/sktime/transformations/panel/tsfresh.py:164: UserWarning: tsfresh requires a unique index, but found non-unique. To avoid this warning, please make sure the index of X contains only unique values.\n",
      "  \"tsfresh requires a unique index, but found \"\n",
      "Feature Extraction: 100%|██████████| 5/5 [00:11<00:00,  2.21s/it]\n",
      "/Users/mloning/Documents/Research/software/sktime/sktime/sktime/transformations/panel/tsfresh.py:164: UserWarning: tsfresh requires a unique index, but found non-unique. To avoid this warning, please make sure the index of X contains only unique values.\n",
      "  \"tsfresh requires a unique index, but found \"\n",
      "Feature Extraction: 100%|██████████| 5/5 [00:03<00:00,  1.45it/s]\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "0.8490566037735849"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "classifier = make_pipeline(\n",
    "    TSFreshFeatureExtractor(default_fc_parameters=\"efficient\", show_warnings=False),\n",
    "    RandomForestClassifier(),\n",
    ")\n",
    "classifier.fit(X_train, y_train)\n",
    "classifier.score(X_test, y_test)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Multivariate time series classification data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2020-12-19T14:31:09.311742Z",
     "iopub.status.busy": "2020-12-19T14:31:09.311092Z",
     "iopub.status.idle": "2020-12-19T14:31:09.380791Z",
     "shell.execute_reply": "2020-12-19T14:31:09.381304Z"
    },
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(60, 6) (60,) (20, 6) (20,)\n"
     ]
    }
   ],
   "source": [
    "X, y = load_basic_motions(return_X_y=True)\n",
    "X_train, X_test, y_train, y_test = train_test_split(X, y)\n",
    "print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2020-12-19T14:31:09.425476Z",
     "iopub.status.busy": "2020-12-19T14:31:09.424972Z",
     "iopub.status.idle": "2020-12-19T14:31:09.427185Z",
     "shell.execute_reply": "2020-12-19T14:31:09.427741Z"
    },
    "jupyter": {
     "outputs_hidden": false
    },
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>dim_0</th>\n",
       "      <th>dim_1</th>\n",
       "      <th>dim_2</th>\n",
       "      <th>dim_3</th>\n",
       "      <th>dim_4</th>\n",
       "      <th>dim_5</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>0    -0.294498\n",
       "1    -0.294498\n",
       "2    -0.050044\n",
       "3...</td>\n",
       "      <td>0     0.540218\n",
       "1     0.540218\n",
       "2    -0.515245\n",
       "3...</td>\n",
       "      <td>0     0.218114\n",
       "1     0.218114\n",
       "2    -0.301108\n",
       "3...</td>\n",
       "      <td>0    -0.045277\n",
       "1    -0.045277\n",
       "2     0.103872\n",
       "3...</td>\n",
       "      <td>0    -0.002663\n",
       "1    -0.002663\n",
       "2    -0.183773\n",
       "3...</td>\n",
       "      <td>0     0.031960\n",
       "1     0.031960\n",
       "2     0.037287\n",
       "3...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>0    -0.761604\n",
       "1    -0.761604\n",
       "2     0.121078\n",
       "3...</td>\n",
       "      <td>0     0.260125\n",
       "1     0.260125\n",
       "2    -1.423255\n",
       "3...</td>\n",
       "      <td>0    -0.064487\n",
       "1    -0.064487\n",
       "2     0.075600\n",
       "3...</td>\n",
       "      <td>0     0.069248\n",
       "1     0.069248\n",
       "2    -0.282318\n",
       "3...</td>\n",
       "      <td>0     0.242367\n",
       "1     0.242367\n",
       "2    -0.332922\n",
       "3...</td>\n",
       "      <td>0    -0.007990\n",
       "1    -0.007990\n",
       "2     0.239704\n",
       "3...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>0    -0.352746\n",
       "1    -0.352746\n",
       "2    -1.354561\n",
       "3...</td>\n",
       "      <td>0     0.316845\n",
       "1     0.316845\n",
       "2     0.490525\n",
       "3...</td>\n",
       "      <td>0    -0.473779\n",
       "1    -0.473779\n",
       "2     1.454261\n",
       "3...</td>\n",
       "      <td>0    -0.327595\n",
       "1    -0.327595\n",
       "2    -0.269001\n",
       "3...</td>\n",
       "      <td>0     0.106535\n",
       "1     0.106535\n",
       "2     0.021307\n",
       "3...</td>\n",
       "      <td>0     0.197090\n",
       "1     0.197090\n",
       "2     0.460763\n",
       "3...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>0    -0.342233\n",
       "1    -0.342233\n",
       "2    -0.298542\n",
       "3...</td>\n",
       "      <td>0     0.327415\n",
       "1     0.327415\n",
       "2    -0.527154\n",
       "3...</td>\n",
       "      <td>0     0.157229\n",
       "1     0.157229\n",
       "2     0.248585\n",
       "3...</td>\n",
       "      <td>0     0.394179\n",
       "1     0.394179\n",
       "2    -0.037287\n",
       "3...</td>\n",
       "      <td>0     0.074574\n",
       "1     0.074574\n",
       "2    -0.087891\n",
       "3...</td>\n",
       "      <td>0    -0.037287\n",
       "1    -0.037287\n",
       "2    -0.050604\n",
       "3...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>0      0.206148\n",
       "1      0.206148\n",
       "2      6.53436...</td>\n",
       "      <td>0    -0.658294\n",
       "1    -0.658294\n",
       "2     4.597327\n",
       "3...</td>\n",
       "      <td>0     0.469612\n",
       "1     0.469612\n",
       "2    -2.723661\n",
       "3...</td>\n",
       "      <td>0    -0.106535\n",
       "1    -0.106535\n",
       "2    -0.439456\n",
       "3...</td>\n",
       "      <td>0     0.306288\n",
       "1     0.306288\n",
       "2     1.717875\n",
       "3...</td>\n",
       "      <td>0     0.950824\n",
       "1     0.950824\n",
       "2    -1.041379\n",
       "3...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                dim_0  \\\n",
       "20  0    -0.294498\n",
       "1    -0.294498\n",
       "2    -0.050044\n",
       "3...   \n",
       "26  0    -0.761604\n",
       "1    -0.761604\n",
       "2     0.121078\n",
       "3...   \n",
       "7   0    -0.352746\n",
       "1    -0.352746\n",
       "2    -1.354561\n",
       "3...   \n",
       "8   0    -0.342233\n",
       "1    -0.342233\n",
       "2    -0.298542\n",
       "3...   \n",
       "10  0      0.206148\n",
       "1      0.206148\n",
       "2      6.53436...   \n",
       "\n",
       "                                                dim_1  \\\n",
       "20  0     0.540218\n",
       "1     0.540218\n",
       "2    -0.515245\n",
       "3...   \n",
       "26  0     0.260125\n",
       "1     0.260125\n",
       "2    -1.423255\n",
       "3...   \n",
       "7   0     0.316845\n",
       "1     0.316845\n",
       "2     0.490525\n",
       "3...   \n",
       "8   0     0.327415\n",
       "1     0.327415\n",
       "2    -0.527154\n",
       "3...   \n",
       "10  0    -0.658294\n",
       "1    -0.658294\n",
       "2     4.597327\n",
       "3...   \n",
       "\n",
       "                                                dim_2  \\\n",
       "20  0     0.218114\n",
       "1     0.218114\n",
       "2    -0.301108\n",
       "3...   \n",
       "26  0    -0.064487\n",
       "1    -0.064487\n",
       "2     0.075600\n",
       "3...   \n",
       "7   0    -0.473779\n",
       "1    -0.473779\n",
       "2     1.454261\n",
       "3...   \n",
       "8   0     0.157229\n",
       "1     0.157229\n",
       "2     0.248585\n",
       "3...   \n",
       "10  0     0.469612\n",
       "1     0.469612\n",
       "2    -2.723661\n",
       "3...   \n",
       "\n",
       "                                                dim_3  \\\n",
       "20  0    -0.045277\n",
       "1    -0.045277\n",
       "2     0.103872\n",
       "3...   \n",
       "26  0     0.069248\n",
       "1     0.069248\n",
       "2    -0.282318\n",
       "3...   \n",
       "7   0    -0.327595\n",
       "1    -0.327595\n",
       "2    -0.269001\n",
       "3...   \n",
       "8   0     0.394179\n",
       "1     0.394179\n",
       "2    -0.037287\n",
       "3...   \n",
       "10  0    -0.106535\n",
       "1    -0.106535\n",
       "2    -0.439456\n",
       "3...   \n",
       "\n",
       "                                                dim_4  \\\n",
       "20  0    -0.002663\n",
       "1    -0.002663\n",
       "2    -0.183773\n",
       "3...   \n",
       "26  0     0.242367\n",
       "1     0.242367\n",
       "2    -0.332922\n",
       "3...   \n",
       "7   0     0.106535\n",
       "1     0.106535\n",
       "2     0.021307\n",
       "3...   \n",
       "8   0     0.074574\n",
       "1     0.074574\n",
       "2    -0.087891\n",
       "3...   \n",
       "10  0     0.306288\n",
       "1     0.306288\n",
       "2     1.717875\n",
       "3...   \n",
       "\n",
       "                                                dim_5  \n",
       "20  0     0.031960\n",
       "1     0.031960\n",
       "2     0.037287\n",
       "3...  \n",
       "26  0    -0.007990\n",
       "1    -0.007990\n",
       "2     0.239704\n",
       "3...  \n",
       "7   0     0.197090\n",
       "1     0.197090\n",
       "2     0.460763\n",
       "3...  \n",
       "8   0    -0.037287\n",
       "1    -0.037287\n",
       "2    -0.050604\n",
       "3...  \n",
       "10  0     0.950824\n",
       "1     0.950824\n",
       "2    -1.041379\n",
       "3...  "
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#  multivariate input data\n",
    "X_train.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2020-12-19T14:31:09.516548Z",
     "iopub.status.busy": "2020-12-19T14:31:09.515810Z",
     "iopub.status.idle": "2020-12-19T14:31:32.787406Z",
     "shell.execute_reply": "2020-12-19T14:31:32.788316Z"
    },
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/mloning/Documents/Research/software/sktime/sktime/sktime/transformations/panel/tsfresh.py:164: UserWarning: tsfresh requires a unique index, but found non-unique. To avoid this warning, please make sure the index of X contains only unique values.\n",
      "  \"tsfresh requires a unique index, but found \"\n",
      "Feature Extraction: 100%|██████████| 5/5 [00:18<00:00,  3.69s/it]\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>dim_0__variance_larger_than_standard_deviation</th>\n",
       "      <th>dim_0__has_duplicate_max</th>\n",
       "      <th>dim_0__has_duplicate_min</th>\n",
       "      <th>dim_0__has_duplicate</th>\n",
       "      <th>dim_0__sum_values</th>\n",
       "      <th>dim_0__abs_energy</th>\n",
       "      <th>dim_0__mean_abs_change</th>\n",
       "      <th>dim_0__mean_change</th>\n",
       "      <th>dim_0__mean_second_derivative_central</th>\n",
       "      <th>dim_0__median</th>\n",
       "      <th>...</th>\n",
       "      <th>dim_5__fourier_entropy__bins_2</th>\n",
       "      <th>dim_5__fourier_entropy__bins_3</th>\n",
       "      <th>dim_5__fourier_entropy__bins_5</th>\n",
       "      <th>dim_5__fourier_entropy__bins_10</th>\n",
       "      <th>dim_5__fourier_entropy__bins_100</th>\n",
       "      <th>dim_5__permutation_entropy__dimension_3__tau_1</th>\n",
       "      <th>dim_5__permutation_entropy__dimension_4__tau_1</th>\n",
       "      <th>dim_5__permutation_entropy__dimension_5__tau_1</th>\n",
       "      <th>dim_5__permutation_entropy__dimension_6__tau_1</th>\n",
       "      <th>dim_5__permutation_entropy__dimension_7__tau_1</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>33.334188</td>\n",
       "      <td>110.735119</td>\n",
       "      <td>0.822452</td>\n",
       "      <td>0.000639</td>\n",
       "      <td>0.001751</td>\n",
       "      <td>0.164096</td>\n",
       "      <td>...</td>\n",
       "      <td>0.165443</td>\n",
       "      <td>0.165443</td>\n",
       "      <td>0.165443</td>\n",
       "      <td>0.192626</td>\n",
       "      <td>0.545824</td>\n",
       "      <td>1.279774</td>\n",
       "      <td>1.910772</td>\n",
       "      <td>2.565051</td>\n",
       "      <td>3.096812</td>\n",
       "      <td>3.567632</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>73.888480</td>\n",
       "      <td>220.949429</td>\n",
       "      <td>0.964075</td>\n",
       "      <td>-0.002087</td>\n",
       "      <td>-0.003908</td>\n",
       "      <td>0.613719</td>\n",
       "      <td>...</td>\n",
       "      <td>0.096509</td>\n",
       "      <td>0.096509</td>\n",
       "      <td>0.261160</td>\n",
       "      <td>0.261160</td>\n",
       "      <td>0.451359</td>\n",
       "      <td>1.313299</td>\n",
       "      <td>1.987599</td>\n",
       "      <td>2.593635</td>\n",
       "      <td>3.173890</td>\n",
       "      <td>3.696247</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>-17.428760</td>\n",
       "      <td>7.940863</td>\n",
       "      <td>0.170422</td>\n",
       "      <td>0.002326</td>\n",
       "      <td>-0.000244</td>\n",
       "      <td>-0.152038</td>\n",
       "      <td>...</td>\n",
       "      <td>0.223718</td>\n",
       "      <td>0.261160</td>\n",
       "      <td>0.356468</td>\n",
       "      <td>0.545824</td>\n",
       "      <td>1.821690</td>\n",
       "      <td>1.438857</td>\n",
       "      <td>2.291659</td>\n",
       "      <td>3.140440</td>\n",
       "      <td>3.819994</td>\n",
       "      <td>4.207710</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>-18.154841</td>\n",
       "      <td>5.568890</td>\n",
       "      <td>0.135705</td>\n",
       "      <td>0.001051</td>\n",
       "      <td>0.000688</td>\n",
       "      <td>-0.196623</td>\n",
       "      <td>...</td>\n",
       "      <td>0.399949</td>\n",
       "      <td>0.705356</td>\n",
       "      <td>1.127853</td>\n",
       "      <td>1.742820</td>\n",
       "      <td>3.274497</td>\n",
       "      <td>1.683010</td>\n",
       "      <td>2.766048</td>\n",
       "      <td>3.748502</td>\n",
       "      <td>4.303872</td>\n",
       "      <td>4.449241</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>395.985445</td>\n",
       "      <td>11192.658970</td>\n",
       "      <td>6.583700</td>\n",
       "      <td>0.099344</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>8.608970</td>\n",
       "      <td>...</td>\n",
       "      <td>0.165443</td>\n",
       "      <td>0.165443</td>\n",
       "      <td>0.165443</td>\n",
       "      <td>0.165443</td>\n",
       "      <td>0.706253</td>\n",
       "      <td>1.483926</td>\n",
       "      <td>2.279149</td>\n",
       "      <td>3.014130</td>\n",
       "      <td>3.525453</td>\n",
       "      <td>3.919983</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 4638 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   dim_0__variance_larger_than_standard_deviation  dim_0__has_duplicate_max  \\\n",
       "0                                             0.0                       0.0   \n",
       "1                                             1.0                       0.0   \n",
       "2                                             0.0                       0.0   \n",
       "3                                             0.0                       0.0   \n",
       "4                                             1.0                       0.0   \n",
       "\n",
       "   dim_0__has_duplicate_min  dim_0__has_duplicate  dim_0__sum_values  \\\n",
       "0                       0.0                   1.0          33.334188   \n",
       "1                       0.0                   1.0          73.888480   \n",
       "2                       0.0                   1.0         -17.428760   \n",
       "3                       0.0                   1.0         -18.154841   \n",
       "4                       0.0                   1.0         395.985445   \n",
       "\n",
       "   dim_0__abs_energy  dim_0__mean_abs_change  dim_0__mean_change  \\\n",
       "0         110.735119                0.822452            0.000639   \n",
       "1         220.949429                0.964075           -0.002087   \n",
       "2           7.940863                0.170422            0.002326   \n",
       "3           5.568890                0.135705            0.001051   \n",
       "4       11192.658970                6.583700            0.099344   \n",
       "\n",
       "   dim_0__mean_second_derivative_central  dim_0__median  ...  \\\n",
       "0                               0.001751       0.164096  ...   \n",
       "1                              -0.003908       0.613719  ...   \n",
       "2                              -0.000244      -0.152038  ...   \n",
       "3                               0.000688      -0.196623  ...   \n",
       "4                               0.000000       8.608970  ...   \n",
       "\n",
       "   dim_5__fourier_entropy__bins_2  dim_5__fourier_entropy__bins_3  \\\n",
       "0                        0.165443                        0.165443   \n",
       "1                        0.096509                        0.096509   \n",
       "2                        0.223718                        0.261160   \n",
       "3                        0.399949                        0.705356   \n",
       "4                        0.165443                        0.165443   \n",
       "\n",
       "   dim_5__fourier_entropy__bins_5  dim_5__fourier_entropy__bins_10  \\\n",
       "0                        0.165443                         0.192626   \n",
       "1                        0.261160                         0.261160   \n",
       "2                        0.356468                         0.545824   \n",
       "3                        1.127853                         1.742820   \n",
       "4                        0.165443                         0.165443   \n",
       "\n",
       "   dim_5__fourier_entropy__bins_100  \\\n",
       "0                          0.545824   \n",
       "1                          0.451359   \n",
       "2                          1.821690   \n",
       "3                          3.274497   \n",
       "4                          0.706253   \n",
       "\n",
       "   dim_5__permutation_entropy__dimension_3__tau_1  \\\n",
       "0                                        1.279774   \n",
       "1                                        1.313299   \n",
       "2                                        1.438857   \n",
       "3                                        1.683010   \n",
       "4                                        1.483926   \n",
       "\n",
       "   dim_5__permutation_entropy__dimension_4__tau_1  \\\n",
       "0                                        1.910772   \n",
       "1                                        1.987599   \n",
       "2                                        2.291659   \n",
       "3                                        2.766048   \n",
       "4                                        2.279149   \n",
       "\n",
       "   dim_5__permutation_entropy__dimension_5__tau_1  \\\n",
       "0                                        2.565051   \n",
       "1                                        2.593635   \n",
       "2                                        3.140440   \n",
       "3                                        3.748502   \n",
       "4                                        3.014130   \n",
       "\n",
       "   dim_5__permutation_entropy__dimension_6__tau_1  \\\n",
       "0                                        3.096812   \n",
       "1                                        3.173890   \n",
       "2                                        3.819994   \n",
       "3                                        4.303872   \n",
       "4                                        3.525453   \n",
       "\n",
       "   dim_5__permutation_entropy__dimension_7__tau_1  \n",
       "0                                        3.567632  \n",
       "1                                        3.696247  \n",
       "2                                        4.207710  \n",
       "3                                        4.449241  \n",
       "4                                        3.919983  \n",
       "\n",
       "[5 rows x 4638 columns]"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "t = TSFreshFeatureExtractor(default_fc_parameters=\"efficient\", show_warnings=False)\n",
    "Xt = t.fit_transform(X_train)\n",
    "Xt.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Using tsfresh for forecasting\n",
    "You can also use tsfresh to do univariate forecasting. To find out more about forecasting, check out our forecasting tutorial notebook."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2020-12-19T14:31:32.796083Z",
     "iopub.status.busy": "2020-12-19T14:31:32.795215Z",
     "iopub.status.idle": "2020-12-19T14:31:49.386345Z",
     "shell.execute_reply": "2020-12-19T14:31:49.386917Z"
    }
   },
   "outputs": [],
   "source": [
    "from sklearn.ensemble import RandomForestRegressor\n",
    "\n",
    "from sktime.datasets import load_airline\n",
    "from sktime.forecasting.base import ForecastingHorizon\n",
    "from sktime.forecasting.compose import make_reduction\n",
    "from sktime.forecasting.model_selection import temporal_train_test_split\n",
    "\n",
    "y = load_airline()\n",
    "y_train, y_test = temporal_train_test_split(y)\n",
    "\n",
    "regressor = make_pipeline(\n",
    "    TSFreshFeatureExtractor(show_warnings=False, disable_progressbar=True),\n",
    "    RandomForestRegressor(),\n",
    ")\n",
    "forecaster = make_reduction(\n",
    "    regressor, scitype=\"time-series-regressor\", window_length=12\n",
    ")\n",
    "forecaster.fit(y_train)\n",
    "\n",
    "fh = ForecastingHorizon(y_test.index, is_relative=False)\n",
    "y_pred = forecaster.predict(fh)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
